************************************************************************************************
* This .do file manages the data and performs regressions associated with "Legal Protection Against Retaliatory Firing Improves
* Workplace Safety" by Matthew S. Johnson, Daniel Schwab, and Patrick Koval, forthcoming in Review of Economics and Statistics. Email addresses are matthew.johnson@duke.edu, dschwab@holycross.edu, patrickkoval@gmail.com.
************************************************************************************************



clear all
set maxvar 32767
set matsize 2000
set more off
//ssc install reghdfe
//ssc install tuples  
cap log close

// This is just to time how long the .do file takes to run and label the log file 
global hrs = substr("$S_TIME",1,2)
di "$hrs"
global mins = substr("$S_TIME",4,2)
di "$mins"
global sec = substr("$S_TIME",7,2)
di "$sec"


*Dan Mac laptop
cd /Users/dschwab/Dropbox/wrongful_discharge/replication_May_2022/intermediate_dtas
global name DS

*Dan Mac desktop
//cd /Users/dschwab/Dropbox/wrongful_discharge/intermediate_dtas

*Matt Duke computer
//cd "C:\Users\msj22\Dropbox\research\wrongful_discharge\replication_May_2022/intermediate_dtas"

log using "logs/wdl_log$S_DATE $hrs $mins $sec.smcl", replace

global policies = ""

timer clear
timer on 1


**********************************************************************************************************************************************************
*Part 1: Creating .dta's that we'll use, doing some minor cleaning
**********************************************************************************************************************************************************	

* Read an Excel sheet with NSC death rates and convert it to a .dta

if 2==0 {
	use ../data/state_codes, clear
	save state_codes, replace
}



if 2==0 { 

	* We are using this spreadsheet only to know which observations represent a partial year
	import excel using "../data/Work deaths by state 1970-2000.xlsx", clear firstrow
	drop if _n==1
	*The variable f`year' gives the number of months on which the value is based. It's 0 for almost all years (indicating a full year).
		foreach i of numlist 1970/2000 {
			replace f`i' = 12 if f`i'==0
		}
		
	rename A state
	drop if state == "Dist. Of Columbia"
	reshape long y f, i(state) j(year)
	gen multiplier = 12/f
	sort state year
	keep state year multiplier
	save NSC_multiplier, replace
	
	import excel using ../data/nsc-corrected-4-1-20/nsc1970.xlsx, firstrow clear //NSC data is in one excel file per year so we have to append it all into one .dta. This is just pulling in 1970.
	destring(totalRaw-unclassRate), force replace
	save nsc, replace

	foreach i of numlist 1971/2000 { 
		import excel using ../data/nsc-corrected-4-1-20/nsc`i'.xlsx, firstrow clear
		destring(totalRaw-unclassRate), force replace
		append using nsc
		drop if stAbbrev == "DC" | stAbbrev == "PR" | stAbbrev == "VI" //Drop DC, Puerto Rico, Virgin Islands
		save nsc, replace
	}
	
	drop if missing(year)
	drop Q-Z
	
	drop stAbbrev stFips comments
	rename stateName state
	
	sort state year
	merge m:1 state using state_codes
	cap drop _merge
	
	sort state year
	merge 1:1 state year using NSC_multiplier
	cap drop _merge
	
	gen homePublicRaw = homeRaw + publicNonMotorRaw
	
	foreach v in workRaw homePublicRaw publicNonMotorRaw homeRaw totalRaw motorVehicleRaw unclassRaw {
		replace `v' = `v' * multiplier // If the multiplier is 8 for example, that the data only includes 8 months of coverage. In that case we multiply by 12/8 to get a yearly rate.
	}
	
	rename workRaw deaths
	drop if missing(year)
	save nsc, replace
}

* State population estimates from Census, downloaded from NBER (March 2021): https://www.nber.org/research/data/us-intercensal-population-county-and-state-1970. This is used in the calculation of non-work accidental death rates
if 2==0 {
	use ../data/county_population, clear
	keep if county_fips == 0
	drop if state_fips == 0 //USA
	keep areaname pop1970-pop2000

	reshape long pop, i(areaname) j(year)

	drop if areaname == "District of Columbia"
	rename areaname state
	
	sort state year
	save pop, replace
}


* This section creates CES2003.dta. It is monthly employment data from CES covering 2003 and beyond. It will be appended to employment_data_1970.txt, which is the same data for 1970 - 2002.

if 2==0 { 
	import excel seriesid year period value using ../data/state_emp_1990_2017_descr.xlsx, clear //Total nonfarm employment at the state level from BLS
	drop if substr(seriesid,1,3) ~= "SMS"

	//The next 6 lines create a variable "seriesid" that matches the seriesid variable found in employment_data_1970.txt. The variable seriesid includes info on state, sector, year, and month.
	gen st = substr(seriesid,4,2) 
	gen first = "0" if substr(seriesid,11,1) == "0"
	replace first = "3" if substr(seriesid,11,1) == "3"
	gen second = "01" if substr(seriesid,11,1) == "0"
	replace second = "11" if substr(seriesid,11,1) == "3"
	replace seriesid = "SMS" + st + "0000" + first + "0000" + second
	
	destring(year), replace
	drop if year <= 2002 | year >= 2017 
	drop first second st
	save CES2003, replace
}

* This section creates CES, which is monthly employment data from 1970 - 2002
if 2==0 {
	import excel using ../data/state_codes.xlsx, firstrow clear //this file has state_code (FIPS) and state name
	rename state_code state_code
	save state_names2, replace

	insheet using ../data/employment_data_1970.txt, clear
	append using CES2003

	*want to break up the series_id into its components
	generate state_code = substr(seriesid,4,2)
	generate str20 state ="."
	generate industry_code = substr(seriesid,10,6)
	rename period month
	replace month = substr(month,2,2)
	destring(month), replace

	*attach state names to the states
	destring(state_code), replace
	sort state_code
	merge m:1 state_code using state_names2
	assert _merge==3
	drop _merge
	drop state

	*want to reshape data to seperate total and manufacturing values
	drop seriesid
	reshape wide value, i(state_name month year) j(industry_code) string

	*rename and order variables 
	rename value000000 total_employees
	rename value300001 manu_employees
	rename state_name state
	order state, first
	order year, before (month)
	sort state year month

	replace total = "6182.7" if state == "Florida" & month == 12 & year == 1995  //These four observations have footnotes that break the import process
	replace total = "1946.5" if state == "Louisiana" & month == 12 & year == 2002
	replace total = "363.9" if state == "New Hampshire" & month == 1 & year == 1979 
	replace manu = "10.6" if state == "Wyoming" & month == 1 & year == 1996

	destring total, replace force
	destring manu, replace force
	 
	//Original data has construction = 1, manu = 2, other = 3, total = 4 
	bys state year: egen empY4 = mean(total)	//empY4 refers to employment at the year level, in the "total" category  
	bys state year: egen empY2 = mean(manu)		//empY2 refers to employment at the year level, in the "manu" category  
	gen empY5 = empY4 - empY2 					//creating category 5, which is non-manu

	gen quarter = floor((month-1)/3)+1
	bys state year quarter: egen empQ4 = mean(total) 	//this is total employment at the quarter level
	bys state year quarter: egen empQ2 = mean(manu)		//this is manu employment at the quarter level

	gen empQ5 = empQ4-empQ2								//this is non-manu employment at the quarter level
	gen yq = (year - 1960)*4 + quarter - 1				//the total number of quarters that have elapsed since 1960q1

	reshape long empQ, i(state year month) j(sector_coarse)

	gen empY = .										
	foreach n of numlist 2 4 5 {
		replace empY = empY`n' if sec == `n'
		drop empY`n'
	}

	drop if year >= 2011
	sort state year quarter sec

	save CES, replace

}

* Some minor data preparation for state unemployment from BLS
if 2==0 { 
	clear all
	file open myfile2 using ../data/state_unemployment_v2, read text 
	file read myfile2 line
	set obs 1
	global n 1
	gen year = ""
	gen state = ""
	gen unemp = ""
	
	while r(eof) == 0 {
		if substr("`line'",1,5) == "Area:" {
			global state = substr("`line'",44,100)
		}
		if substr("`line'",1,1) == "1" | substr("`line'",1,1) == "2" { //this is a line that starts with a year so we get year and unemployment level
			replace year = substr("`line'",1,4) if _n == $n
			replace state = "$state" if _n == $n
			replace unemp = substr("`line'",6,4) if _n == $n	
			global n = $n+1
			set obs $n
		} 
		file read myfile2 line
	}
	drop if _n == $n //this is the last empty line
	destring year, force replace
	drop if year >= 2012 
	destring unemp, replace	
	
	sort state
	merge m:1 state using state_codes
	drop if _merge == 1 //DC
	cap drop _merge
	save unemployment, replace
}


**********************************************************************************************************************************************************
*Part 2: creating the main dataset
**********************************************************************************************************************************************************


if 2==0 { 
	use state_codes, clear //contains state name, 2 letter abbreviation, census division, state number (1-50, alphabetical)
	sort state
	save state_codes, replace

	use ../data/state_panel, clear //This is an empty monthly panel of states

	sort state
	merge state using state_codes
	drop _merge

 * We use WDL data from Autor et al. (2006), "The costs of wrongful discharge laws"
	gen monyear = month/100 + year 
	generate GoodFaith=0 
	replace GoodFaith=1 if monyear >= 1983.05 & state == "Alaska"	
	replace GoodFaith=1 if monyear >= 1985.06 & state == "Arizona"
	replace GoodFaith=1 if monyear >= 1980.10 & state == "California"
	replace GoodFaith=1 if monyear >= 1980.06 & state == "Connecticut"
	replace GoodFaith=1 if monyear >= 1992.04 & state == "Delaware"
	replace GoodFaith=1 if monyear >= 1989.08 & state == "Idaho"
	replace GoodFaith=1 if monyear >= 1977.07 & state == "Massachusetts"
	replace GoodFaith=1 if monyear >= 1982.01 & state == "Montana"
	replace GoodFaith=1 if monyear >= 1987.02 & state == "Nevada"
	replace GoodFaith=1 if monyear >= 1974.02 & monyear <= 1980.05 & state == "New Hampshire"
	replace GoodFaith=1 if monyear >= 1985.05 & monyear <= 1989.02 & state == "Oklahoma"	
	replace GoodFaith=1 if monyear >= 1994.01 & state == "Wyoming"
	generate PublicPolicy=0 
	replace PublicPolicy=1 if monyear >= 1986.02 & state == "Alaska"
	replace PublicPolicy=1 if monyear >= 1985.06 & state == "Arizona"	
	replace PublicPolicy=1 if monyear >= 1980.03 & state == "Arkansas"
	replace PublicPolicy=1 if monyear >= 1969.01 & state == "California"
	replace PublicPolicy=1 if monyear >= 1985.09 & state == "Colorado"	
	replace PublicPolicy=1 if monyear >= 1980.01 & state == "Connecticut"
	replace PublicPolicy=1 if monyear >= 1992.03 & state == "Delaware"
	replace PublicPolicy=1 if monyear >= 1982.10 & state == "Hawaii"
	replace PublicPolicy=1 if monyear >= 1977.04 & state == "Idaho"
	replace PublicPolicy=1 if monyear >= 1978.12 & state == "Illinois"
	replace PublicPolicy=1 if monyear >= 1973.05 & state == "Indiana"
	replace PublicPolicy=1 if monyear >= 1985.07 & state == "Iowa"
	replace PublicPolicy=1 if monyear >= 1981.06 & state == "Kansas"
	replace PublicPolicy=1 if monyear >= 1983.11 & state == "Kentucky"
	replace PublicPolicy=1 if monyear >= 1981.07 & state == "Maryland"
	replace PublicPolicy=1 if monyear >= 1980.05 & state == "Massachusetts"
	replace PublicPolicy=1 if monyear >= 1986.11 & state == "Minnesota"
	replace PublicPolicy=1 if monyear >= 1976.06 & state == "Michigan"
	replace PublicPolicy=1 if monyear >= 1987.07 & state == "Mississippi"
	replace PublicPolicy=1 if monyear >= 1985.11 & state == "Missouri"
	replace PublicPolicy=1 if monyear >= 1980.01 & state == "Montana"
	replace PublicPolicy=1 if monyear >= 1987.11 & state == "Nebraska"
	replace PublicPolicy=1 if monyear >= 1984.01 & state == "Nevada"
	replace PublicPolicy=1 if monyear >= 1974.02 & state == "New Hampshire"
	replace PublicPolicy=1 if monyear >= 1980.07 & state == "New Jersey"
	replace PublicPolicy=1 if monyear >= 1983.07 & state == "New Mexico"
	replace PublicPolicy=1 if monyear >= 1985.05 & state == "North Carolina"
	replace PublicPolicy=1 if monyear >= 1987.11 & state == "North Dakota"
	replace PublicPolicy=1 if monyear >= 1990.03 & state == "Ohio"
	replace PublicPolicy=1 if monyear >= 1989.02 & state == "Oklahoma"
	replace PublicPolicy=1 if monyear >= 1975.06 & state == "Oregon"
	replace PublicPolicy=1 if monyear >= 1974.03 & state == "Pennsylvania"
	replace PublicPolicy=1 if monyear >= 1985.11 & state == "South Carolina"
	replace PublicPolicy=1 if monyear >= 1988.12 & state == "South Dakota"
	replace PublicPolicy=1 if monyear >= 1984.08 & state == "Tennessee"
	replace PublicPolicy=1 if monyear >= 1984.06 & state == "Texas"
	replace PublicPolicy=1 if monyear >= 1989.03 & state == "Utah"
	replace PublicPolicy=1 if monyear >= 1986.09 & state == "Vermont"
	replace PublicPolicy=1 if monyear >= 1985.06 & state == "Virginia"
	replace PublicPolicy=1 if monyear >= 1984.07 & state == "Washington"
	replace PublicPolicy=1 if monyear >= 1978.07 & state == "West Virginia"
	replace PublicPolicy=1 if monyear >= 1980.01 & state == "Wisconsin"
	replace PublicPolicy=1 if monyear >= 1989.07 & state == "Wyoming"
	generate ImpliedContract=0 
	replace ImpliedContract=1 if monyear >= 1987.07 & state == "Alabama"
	replace ImpliedContract=1 if monyear >= 1983.05 & state == "Alaska"
	replace ImpliedContract=1 if monyear >= 1983.06 & monyear <= 1984.04 & state == "Arizona"
	replace ImpliedContract=1 if monyear >= 1984.06 & state == "Arkansas"
	replace ImpliedContract=1 if monyear >= 1972.03 & state == "California"
	replace ImpliedContract=1 if monyear >= 1983.10 & state == "Colorado"
	replace ImpliedContract=1 if monyear >= 1985.10 & state == "Connecticut"
	replace ImpliedContract=1 if monyear >= 1986.08 & state == "Hawaii"
	replace ImpliedContract=1 if monyear >= 1977.04 & state == "Idaho"
	replace ImpliedContract=1 if monyear >= 1974.12 & state == "Illinois"
	replace ImpliedContract=1 if monyear >= 1987.08 & state == "Indiana"
	replace ImpliedContract=1 if monyear >= 1987.11 & state == "Iowa"
	replace ImpliedContract=1 if monyear >= 1984.08 & state == "Kansas"
	replace ImpliedContract=1 if monyear >= 1983.08 & state == "Kentucky"
	replace ImpliedContract=1 if monyear >= 1977.11 & state == "Maine"
	replace ImpliedContract=1 if monyear >= 1985.01 & state == "Maryland"
	replace ImpliedContract=1 if monyear >= 1988.05 & state == "Massachusetts"
	replace ImpliedContract=1 if monyear >= 1983.04 & state == "Minnesota"
	replace ImpliedContract=1 if monyear >= 1980.06 & state == "Michigan"
	replace ImpliedContract=1 if monyear >= 1992.06 & state == "Mississippi"
	replace ImpliedContract=1 if monyear >= 1983.01 & monyear <= 1988.02 & state == "Missouri"
	replace ImpliedContract=1 if monyear >= 1987.06 & state == "Montana"
	replace ImpliedContract=1 if monyear >= 1983.11 & state == "Nebraska"
	replace ImpliedContract=1 if monyear >= 1983.08 & state == "Nevada"
	replace ImpliedContract=1 if monyear >= 1985.05 & state == "New Jersey"
	replace ImpliedContract=1 if monyear >= 1980.02 & state == "New Mexico"
	replace ImpliedContract=1 if monyear >= 1982.11 & state == "New York"
	replace ImpliedContract=1 if monyear >= 1984.02 & state == "North Dakota"
	replace ImpliedContract=1 if monyear >= 1982.04 & state == "Ohio"
	replace ImpliedContract=1 if monyear >= 1976.12 & state == "Oklahoma"
	replace ImpliedContract=1 if monyear >= 1978.03 & state == "Oregon"
	replace ImpliedContract=1 if monyear >= 1983.04 & state == "South Dakota"
	replace ImpliedContract=1 if monyear >= 1981.11 & state == "Tennessee"
	replace ImpliedContract=1 if monyear >= 1985.04 & state == "Texas"
	replace ImpliedContract=1 if monyear >= 1986.05 & state == "Utah"
	replace ImpliedContract=1 if monyear >= 1985.08 & state == "Vermont"
	replace ImpliedContract=1 if monyear >= 1983.09 & state == "Virginia"
	replace ImpliedContract=1 if monyear >= 1977.08 & state == "Washington"
	replace ImpliedContract=1 if monyear >= 1986.04 & state == "West Virginia"
	replace ImpliedContract=1 if monyear >= 1985.08 & state == "Wyoming"
	
	drop monyear
	
	rename GoodFaith wdlag
	rename PublicPolicy wdlap
	rename ImpliedContract wdlac

	save EPL_monthly$name, replace	

 * Next step: merge WDL with labor force. 
	use CES, clear 
	
	replace sector_coarse = 1 if sector_coarse==4 // Overall
	replace sector_coarse = 2 if sector_coarse==2 // Manufacturing
	replace sector_coarse = 3 if sector_coarse==5 // Non-manufacturing

	label define sector_coarse_labs 1 "Overall" 2 "Manufacturing" 3 "Non-manufacturing"
	label values sector_coarse sector_coarse_labs
	
	sort state year month sector_coarse
	drop if regexm(state, "Puerto") | regexm(state, "Virgin Island") | regexm(state, "District of Columbia")
	
 * Merge in monthly EPL data
	sort state year month
	cap drop _merge
	merge m:1 state year month using EPL_monthly$name	
	drop _merge

 * Next merge in the state unemployment data. The raw data for unemployment is saved in wrongful_discharge/data/state_unemployment_v2, downloaded from BLS. There is a small amount of data cleaning above.
	sort state2 year
	merge m:1 state2 year using unemployment

	drop if _merge == 2 //drops 2011 data (years that are only in unemployment data, not main dataset) 
	cap drop _merge 			
	
	global wdl "ag ac ap"
	
	*Create a quarterly and yearly averages for WDL
		foreach e in $wdl {
			bys state year quarter: egen wdl`e'Q = mean(wdl`e') //wdl`e' is monthly wdl. wdl`e'Q is quarterly and wdl`e'Y is yearly.
			bys state year: egen wdl`e'Y = mean(wdl`e')			
		}
		keep if month == 1 | month == 4 | month == 7 | month == 10 //Limiting the data to quarterly. 
		save EPLpopA$name, replace

		
		
*************************
*Data on % unionized by state, so that we can construct unionized employment 
***************************
*First, the data with state union density by sector 
*Note: these data begin in 1983
	forvalues i = 1983/2007 {
		import excel "..\data\unionstats\State U_`i'.xls", sheet("State `i'") cellrange(A4:I259) firstrow clear
		keep if inlist(Sector, "Total", "Private")==1
		keep Code State Sector Mem
		reshape wide Mem, i(State) j(Sector) string
		gen year = `i'
		tempfile union`i'
		save `union`i'', replace
	}
	forvalues i = 2008/2009 {
		import excel "..\data\unionstats\State_U_`i'.xls", sheet("State `i'") cellrange(A4:I259) firstrow clear
		keep if inlist(Sector, "Total", "Private")==1
		keep Code State Sector Mem
		reshape wide Mem, i(State) j(Sector) string
		gen year = `i'
		tempfile union`i'
		save `union`i'', replace
	}
	forvalues i = 2010/2016 {
		import excel "..\data\unionstats\State_U_`i'.xlsx", sheet("State `i'") cellrange(A4:I259) firstrow clear
		keep if inlist(Sector, "Total", "Private")==1
		keep Code State Sector Mem
		reshape wide Mem, i(State) j(Sector) string
		gen year = `i'
		tempfile union`i'
		save `union`i'', replace
	}


	*Append data together 
		u `union1983', clear
		forvalues i = 1984 / 2016 {
			append using `union`i''
		}
	*Merge in state IDs
		drop Code
		replace State = "DISTRICT OF COLUMBIA" if State=="D.C."
		ssc install statastates, replace
		statastates, name(State)
//		statastates, name(State) nogen
		drop State state_fips
		rename state_abbrev state2 
		order state2 year MemPrivate MemTotal
		
		tempfile pubpriv
		save `pubpriv', replace 


*Next, the data with aggregate state union density (this dataset goes back further to 1964)
		import excel "../data/unionstats/State_Union_Membership_Density_1964-2016.xlsx", sheet("%Members 1964-2016") cellrange(A1:BD53) clear first	
		drop if StateName=="All States"
		drop StateName StateID
		forvalues i= 0/9 {
			rename Mem0`i' Mem200`i'
		}
		forvalues i= 0/6 {
			rename Mem1`i' Mem201`i'
		}
		forvalues i= 64/99 {
			rename Mem`i' Mem19`i'
		}
		reshape long Mem, i(state_code) j(year)
		rename Mem pct_union_mem
		rename state_code state2

	*Merge in the data by on unionization rates by sector
		merge 1:1 state2 year using `pubpriv', nogen
		
		
*Extrapolate the % unionized in the private sector in the years before 1983
*For each state, calculate the time trend in the difference between the % unionized in total versus private, and use this trend to extrapolate earlier years 
	gen total_priv_diff = MemPrivate / MemTotal
	
	levelsof state2, local(states)
	foreach state in `states' {
		reg total_priv_diff year if state2=="`state'"
		predict total_priv_diff_pred if state2=="`state'"
		replace MemPrivate = pct_union_mem*total_priv_diff_pred if state2=="`state'" & year<=1982
		drop total_priv_diff_pred 
	}
		
	drop MemTotal total_priv_diff
	
	rename MemPrivate pct_union_mem_private 

*Save to merge into main dataset 
		tempfile pct_union_state
		save `pct_union_state', replace	
		
 * Merge the EPL data with OSHA inspection data
	use ../data/complaint_accident_insps_state_year, clear //This is the inspection data from OSHA. It is quarterly.
	rename state state2
	merge m:1 state2 year using `pct_union_state', keep(master match) nogen //bringing in the data on percentage of the workforce in unions
	
	cap drop _merge
	sort state2 year quarter sector
	merge 1:m state2 year quarter sector using EPLpopA$name //merging in EPL and employment. note: employment is in 000's
	drop if _merge == 1 // DC only
	drop _merge
	
 * Make state-year trends
	levelsof(state2), local(states)
	qui foreach s in `states' {
		gen `s'_year = 0
		replace `s'_year = year if state2 == "`s'"
	}
	
 * Region-year and division-year dummies. There are 4 census regions and 9 census divisions. https://www2.census.gov/geo/pdfs/maps-data/maps/reference/us_regdiv.pdf 
	gen region = ""
	replace region = "New England" if division == "New England" | division == "Mid Atlantic"
	replace region = "Midwest" if division == "East North Central" | division == "West North Central"
	replace region = "South" if division == "South Atlantic" | division == "East South Central" | division == "West South Central"
	replace region = "West" if division == "Mountain" | division == "Pacific"

	egen year_div = group(division year)
	egen year_reg = group(region year)
	
	*We made # union accidents in the OSHA data, so make # non-union accidents
		gen num_nonunion_acc_insp = num_accident_insp - num_union_acc_insp
		
	*making ln(variables) using Matt's method
		*First, make Annual counts
			foreach v of varlist num_accident_insp num_complaint_insp num_prgm_insp num_fatal_acc_insp num_union_acc_insp num_nonunion_acc_insp {
				rename `v' `v'Q
				egen `v'Y = sum(`v'Q), by(state2 year sector_coarse) //for these variables we want total counts
			}
	
	/*
	*****************************
	*Delete this, as we no longer have these variables 
	****************************
			foreach v of varlist *_viol_prog gravity_prog {
				rename `v' `v'Q
				egen `v'Y = mean(`v'Q), by(state2 year sector_coarse) //for these variables we want average values
			}
	*/	
		*Annual and quarterly rates
		foreach t in Y Q {
			sum emp`t' if emp`t'>0, d        
			gen acc_rate`t'_matt = num_accident_insp`t' / (emp`t'+`r(p1)') 
			gen prgm_rate`t'_matt = num_prgm_insp`t'/(emp`t'+`r(p1)')
			gen complaint_rate`t'_matt = num_complaint_insp`t'/(emp`t'+`r(p1)')
			gen fatal_acc_rate`t'_matt = num_fatal_acc_insp`t'/(emp`t'+`r(p1)')
			gen total_viol_prog_rate`t'_matt = total_viol_prog`t'/(emp`t'+`r(p1)')
		}
		*To get rates for union/non-union accidents, need to have union/non-union employment
		foreach t in Y Q {
			gen union_emp`t' = emp`t'*pct_union_mem_private/100
			gen nonunion_emp`t' = emp`t'*(100-pct_union_mem_private)/100		

			sum union_emp`t' if union_emp`t'>0, d
			gen union_acc_rate`t'_matt = num_union_acc_insp`t'/(union_emp`t'+`r(p1)')
			
			sum nonunion_emp`t' if nonunion_emp`t'>0, d
			gen nonunion_acc_rate`t'_matt = num_nonunion_acc_insp`t'/(nonunion_emp`t'+`r(p1)')
		}

	*To take logs, add the first non-zero percentile, rather than adding 1. 
	*When the variable ranges from acc_rateY_matt between 0 and 1, adding one before taking log seems like the distributional gods will be angry
	foreach t in Y Q {
*		sum total_viol_prog_rate`t'_matt if total_viol_prog_rate`t'_matt >0, d
*			gen ln_total_viol_prog_rate`t'_matt = ln(`r(p1)'+total_viol_prog_rate`t'_matt)
		sum acc_rate`t'_matt if acc_rate`t'_matt >0, d
			gen ln_acc_rate`t'_matt = ln(`r(p1)'+acc_rate`t'_matt) 
		sum prgm_rate`t'_matt if prgm_rate`t'_matt >0, d
			gen ln_prgm_rate`t'_matt = ln(`r(p1)'+prgm_rate`t'_matt)
		sum complaint_rate`t'_matt if complaint_rate`t'_matt >0, d
			gen ln_complaint_rate`t'_matt = ln(`r(p1)'+complaint_rate`t'_matt)
*		sum fatal_acc_rate`t'_matt if fatal_acc_rate`t'_matt >0, d
*			gen ln_fatal_acc_rate`t'_matt = ln(`r(p1)'+fatal_acc_rate`t'_matt)
		sum union_acc_rate`t'_matt if union_acc_rate`t'_matt >0, d
			gen ln_union_acc_rate`t'_matt = ln(`r(p1)'+union_acc_rate`t'_matt)
		sum nonunion_acc_rate`t'_matt if nonunion_acc_rate`t'_matt >0, d
			gen ln_nonunion_acc_rate`t'_matt = ln(`r(p1)'+nonunion_acc_rate`t'_matt)
	}
	*Regressions will be weighted by each state's share of 1970 national employment for NSC, and by 1979 for OSHA data (first year of sample)
		gen zemp1970 = empY if year==1970 & quarter==1 // employment in 1970Q1 for a given state/sector
		egen emp1970 = max(zemp1970), by(state sector_coarse)	
		
	*Total US employment
		egen emp1970_national = sum(zemp1970), by(sector_coarse)

	*Each state/sector's share of 1979 employment
		gen sh_emp1970 = emp1970/emp1970_national

	*Each state's 1979 employment
		gen zemp1979 = empY if year==1979 & quarter==1 // employment in 1979Q1 for a given state/sector
		egen emp1979 = max(zemp1979), by(state sector_coarse)
	*Total US employment
		egen emp1979_national = sum(zemp1979), by(sector_coarse)
	*Each state/sector's share of 1979 employment
		gen sh_emp1979 = emp1979/emp1979_national		
		
	*Annual % of employment in mfg
		gen mfg_empY = empY if sector==2
		egen mfg_empY2 = max(mfg_empY), by(state2 year)
		gen total_empY = empY if sector==1
		egen state_year_emp = max(total_empY), by(state2 year) //this is total employment at the state-year level
		gen pct_emp_mfg = mfg_empY2/state_year_emp
			lab var pct_emp_mfg "Share of emp in mfg"
		sum pct_emp_mfg, d	
		drop mfg_empY* total_empY
		
	keep if month == 1 // we're only using annual data, not quarterly or monthly
	drop *Q* *quarter* yq //these are quarterly variables
	drop wdlag wdlap wdlac // these are monthly variables
	
	save EPLinjA$name, replace 
}

* creating a dataset that includes deaths from NSC and annual OSHA injuries
if 2==0 { 
		use nsc, clear
		cap drop _merge
		
		sort state year
		merge 1:1 state year using pop //this is population. We're using it to calculate non-work death rates
		
	*creating a "population in 1970" variable for weighting purposes in the non-work fatal accident regressions	
		gen zpop1970 = pop if year == 1970
		egen pop1970 = max(zpop1970), by(state)
	*calculating national 1970 population so we can look at share of population	
		egen pop1970_national = sum(zpop1970)
		gen sh_pop1970 = pop1970/pop1970_national

		order state2 year deaths
		sort state2 year
		
	*merge in the annual OSHA data
		merge 1:m state2 year using EPLinjA$name , gen(merge_osha2)
		cap drop _merge
		sort state2
	*Merge in the indicators of which states are OSHA state-run states
		merge m:1 state2 using ../data/state_run, nogen
	
	*Create the rate of deaths/accidents/complaints/etc
		sum state_year_emp if state_year_emp>0, d
			gen death_rate_matt = deaths / (state_year_emp+`r(p1)')				//note - this is state_year_emp because we want total employment for NSC
		sum pop if pop>0, d	
			gen public_rate_matt = publicNonMotorRaw / (pop +`r(p1)')			//population rather than labor force because we're looking at non-work deaths
			gen home_rate_matt = homeRaw / (pop + `r(p1)')
			gen home_public_rate_matt = homePublicRaw / (pop + `r(p1)') 
		sum empY if empY>0, d
			gen acc_rate_matt = num_accident_inspY / (empY+`r(p1)')
			gen prgm_rate_matt = num_prgm_inspY/(empY+`r(p1)')
			gen complaint_rate_matt = num_complaint_inspY/(empY+`r(p1)')
			gen fatal_acc_osha_rate_matt = num_fatal_acc_inspY/(empY+`r(p1)')
			gen union_acc_osha_rate_matt = num_union_acc_inspY/(empY+`r(p1)')
			gen total_viol_prog_matt = total_viol_progY/(empY+`r(p1)')

	*To take logs, add the first non-zero percentile, rather than adding 1. 
	*When the variable ranges from between 0 and 1, adding one before taking log seems like the distributional gods will be angry
		local vars total_viol_prog_matt home_public_rate_matt public_rate_matt home_rate_matt death_rate_matt acc_rate_matt prgm_rate_matt complaint_rate_matt fatal_acc_osha_rate_matt union_acc_osha_rate_matt
		foreach v in `vars' {
			qui sum `v' if `v' >0, d
			gen ln_`v' = ln(`r(p1)'+`v')
		}
		
	save osha_nsc, replace
}

* merging in statute data, labeling some variables
if 2==0 { 
	*Spreadsheet on WB and WC statutes, as well as various measures of "stringency" of these statutes 
	insheet using "../data/mergedYears2020-3-30.csv", clear

	rename stateabb state2
	*In "days to file," "NTL" = no time listed 
	replace wbdaystofile = "" if wbdaystofile=="NTL"
	destring wbdaystofile, replace 

	keep state2 wbyear wcyear wbdaystofile wbcompensatory wbpunitive wccompensatory wcpunitive 
	count

	tempfile statutes 
	save `statutes', replace 

*Merge into the main dataset 

	use osha_nsc, clear
	drop if missing(state_code)
	
	merge m:1 state2 using `statutes', keep(master match) gen(merge_stat)
*Generate indicators for having WB or WC statute, as well as categories of "bite" of these statutes 
	gen wb = 0
	replace wb = 1 if year>=wbyear & missing(wbyear)!=1
	
	gen wc = 0
	replace wc = 1 if year>=wcyear & missing(wcyear)!=1
*Measures of stringency of each statute 
	local lets b c

	foreach let in `lets' {
		*includes punitive damanges 
			gen w`let'_pun = w`let'==1 & w`let'punitive==1
			gen w`let'_nopun = w`let'==1 & w`let'punitive!=1
		*includes compensatory damages 
			gen w`let'_compen = w`let' ==1 & w`let'compensatory==1
			gen w`let'_nocompen = w`let' ==1 & w`let'compensatory!=1
	}
	
	sort state year 
	cap drop _merge
	merge state year using ../data/gov2 //Data on which party controls governership in which years. It comes from Klarner (2013) - https://dataverse.harvard.edu/dataset.xhtml?persistentId=hdl:1902.1/20408.

	label var dem_gov "Democratic governor"
	label var wdlapY "Public policy exception"
	label var wdlacY "Implied contract exception"
	label var wdlagY "Good faith exception"
	label var wc "Workers' comp statute"
	label var wb "Whistleblower statute"
	label var ln_prgm_rateY_matt "Prog. inspection rate"
	label var unemp "State unemployment rate"
	
	local t "Y"
	local vars total_viol_prog death_rate acc_rate prgm_rate complaint_rate fatal_acc_osha_rate union_acc_osha_rate
	foreach v in `vars' {
		gen asinh_`v'`t'_matt = asinh(`v'_matt) 
	}	
	
	egen statesec = group(state_num sec)
	egen secyear = group(sec year)

	save osha_nsc2, replace	
}


**********************************************************************************************************************************************************
* Part 3: regressions 
**********************************************************************************************************************************************************

global final 2005 //The last change in any common law is 1994. The last change in public policy common law is 1993. The last anti WC-retaliation change is 1998. The last whistleblower retaliation change is during 2003 (so shows up for us in 2004). We're using up to 2005.


**********************************************************************************************************************************************************
* Figure 1: the number of states which had passed public policy exception, whistleblower statute, and workers' comp statute over time. Figure 2: same but number of states reporting OSHA and NSC data.
**********************************************************************************************************************************************************

if 1==0 { 

	use osha_nsc2, clear
	keep if sector_coarse == 1 & year <= $final & month == 1
	
	*Indicator that a state has injury data from OSHA and from NSC
		gen osha_yes = ln_acc_rateY_matt!=. &  ( (state_run_office!=1 & year>=1979) | (state_run_office==1 & year>=1992)) 
		gen nsc_yes = ln_death_rate_matt!=.
		
	collapse (sum) wdlapY  wb wc wdlag wdlac osha_yes nsc_yes, by(year)
		
	*Make graphs	
//	graph set window fontface "Centaur"
	graph set window fontface "default" //Centaur was causing issues for me (Dan)
	*PP/WC/WB adoption over time --- This is Figure 1
	twoway (line wdlapY  wc wb year, lp( solid dash  dash_dot) lc(black gs2 gs4) ) , ///
		xtitle("Year") ytitle("Number of States Adopting Protection", height(5) size(medsmall))  graphregion(fcolor(white)) /// 
		text(38 1981.5 "Public policy" "exception", place(e)) ///
		text(21 1988 "Workers'" "compensation" "statute", place(e)) ///
		text(37.75 1998 "Whistleblower" "statute", place(e)) ///
		legend(off)
	graph export ../regression_results/PP_WC_WB_time.pdf, replace 

	*% of states with OSHA/NSC data --- This is Figure D.2
		twoway (line osha_yes nsc_yes year , lp( dash solid )  ), ///
				xtitle("Year") ytitle("Number of States Reporting data", height(5) size(medsmall))  graphregion(fcolor(white)) /// 
				text(38 1992 "OSHA", place(e)) ///
				text(41.5 1975 "NSC", place(e)) ///
				legend(off)
		
		graph export ../regression_results/OSHA_NSC_time.pdf, replace	
}


**********************************************************************************************************************************************************
* Figure D.1: Make OSHA jurisdiction map 
**********************************************************************************************************************************************************

if 1==0 { 
	ssc install maptile, replace
	maptile_install using "http://files.michaelstepner.com/geo_state.zip"
	use osha_nsc2, clear

	keep if year==1990 & sector_coarse==1
	drop state
	clonevar state = state2
	maptile state_run_office, geography(state) rangecolor(gray*0.1 blue*1.2) twopt(legend(off))
	
	graph export  ../regression_results/osha_jurisdiction_map.pdf, replace
}


**********************************************************************************************************************************************************
*Table 1: Summary statistics table
**********************************************************************************************************************************************************

if 1==0 {  // Table 1 
	use osha_nsc2, clear
	keep if year <= $final
	label var acc_rateY_matt "Accident inspections per 1000 employees (OSHA)"
	label var death_rate "Deaths per 1000 employees (NSC)"
	label var wdlapY  "Public policy exception"
	label var wc "Workers' comp. anti-retaliation statute"
	label var wb "Whistleblower protection statute"
	local vars = "acc_rateY_matt death_rate wdlapY wc wb" 
	estpost su `vars', detail
	est store C1
	esttab C1  using "../regression_results/sum.tex", ///
		label fragment replace nonum cells("mean(fmt(3)) sd(fmt(3))") nomtitles noobs 	
}



**********************************************************************************************************************************************************
*Tables 2, 3, D.13. LHS = OSHA accident inspections, NSC workplace deaths, or NSC nonwork deaths. RHS = various combinations of statutes, exceptions, and controls. Three tables, each with 8 columns.
**********************************************************************************************************************************************************

if 1==0 { 
	eststo clear
	use osha_nsc2, clear
	keep if year <= $final
	
	global nonwork = "home_public" 
	
	// Next two lines make sure that workplace death sample is consistent across FE specs
	reghdfe ln_death_rate_matt wdlapY dem_gov $weight if sector == 1, absorb(year_div state_code state_code#c.year) vce(cluster state_code) //this line of code is to get a consistent sample for the different FE specs with NSC
	gen insample_nsc = 1 if e(sample)
	
	// Same as above but for non-work deaths
	reghdfe ln_${nonwork}_rate_matt wdlapY dem_gov $weight if sector == 1, absorb(year_div state_code state_code#c.year) vce(cluster state_code) //this line of code is to get a consistent sample for the different FE specs with NSC
	gen insample_nonwork = 1 if e(sample)
	
	global policies = "wb wc wdlagY wdlacY"
	
	foreach i in 1 3 4 {
		if `i' == 1 { // Accident rates (OSHA) is LHS variable
			global depvar = "ln_acc_rateY_matt"
			global prog = "ln_prgm_rateY_matt"
			global unemp = "unemp"
			global dem_gov = "dem_gov"
			global weight = "[aw = sh_emp1979]"
			global sample = "((state_run_office!=1 & year>=1979) | (state_run_office==1 & year>=1992)) & sector == 1" 
		}
		if `i' == 3 { // Workplace deaths rates (NSC) is LHS variable
			global depvar =  "ln_death_rate_matt"
			global sample = "sector==1 & insample_nsc==1"
			global prog = "ln_prgm_rateY_matt"
			global dem_gov = "dem_gov"
			global unemp = ""
			global weight = "[aw = sh_emp1970]"
		}
		if `i' == 4 { // Non-work death rates (NSC) is LHS variable
			global depvar = "ln_${nonwork}_rate_matt" 
			global sample = "sector==1 & insample_nonwork==1"
			global prog = "ln_prgm_rateY_matt"
			global dem_gov = "dem_gov"
			global unemp = ""
			global weight = "[aw = sh_pop1970]" 
		}
			global trend = ""
			global trend_label = "No"
			global year_area = "" 
			global yr_label = "No" 			
			reghdfe $depvar wdlapY if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "No"
				estadd local area_year "$yr_label" 				
			reghdfe $depvar wb if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
			reghdfe $depvar wc if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
			reghdfe $depvar wdlapY wb wc if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $prog $dem_gov $unemp if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			global trend = ""
			global trend_label = "No"
			global year_area = "year_div" 
			global yr_label = "Yes" 			
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $prog $dem_gov $unemp if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			global trend = "state_code#c.year"
			global trend_label = "Yes"
			global year_area = "year_div" 
			global yr_label = "Yes" 			
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $prog $dem_gov $unemp if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "test" // "$trend_label"
				estadd local area_year "$yr_label" 				
		if `i' == 1 {
			global table_name = "main_osha_$name" // Table 2
		}
		if `i' == 3 {
			global table_name = "main_nsc_$name" // Table 3
		} 
		if `i' == 4 {
			global table_name = "main_${nonwork}_$name" // Table D.13
		} 
	
		esttab using "../regression_results/$table_name.tex", ///
			booktabs fragment replace scalars("basicFE State FE" "state_trend State trend" "area_year Division-year FE") ///
			se b(3) order(wdlapY  wb wc wdl* $unemp $dem_gov $prog) keep(wdlapY  wb wc wdl* $unemp $dem_gov $prog) star(* .1 ** .05 *** .01)   label nodepvar ///
			nonotes nomtitles  	
		eststo clear
	
	}
}


**********************************************************************************************************************************************************
*Table D.6. Table 2, but with observations are at the sector-state-year level
**********************************************************************************************************************************************************

if 1==0 { 

	eststo clear
	use osha_nsc2, clear
	keep if year <= $final
	
	global fe = "statesec secyear"
	global policies = "wb wc wdlagY wdlacY"
	
	foreach i in 1 { 
		if `i' == 1 { // Accident rates (OSHA) is LHS variable
			global depvar = "ln_acc_rateY_matt"
			global prog = "ln_prgm_rateY_matt"
			global unemp = "unemp"
			global dem_gov = "dem_gov"
			global weight = "[aw = sh_emp1979]"
			global sample = "((state_run_office!=1 & year>=1979) | (state_run_office==1 & year>=1992)) & sector ~= 1" // sector ~= 1 means we are including manu and non-manu but not "overall"
		}
			global trend = ""
			global trend_label = "No"
			global year_area = "" 
			global yr_label = "No" 			
			reghdfe $depvar wdlapY if ($sample) $weight, absorb($year_area $fe $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			reghdfe $depvar wb if ($sample) $weight, absorb($year_area $fe $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
			reghdfe $depvar wc if ($sample) $weight, absorb($year_area $fe $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
			reghdfe $depvar wdlapY wb wc if ($sample) $weight, absorb($year_area $fe $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $prog $dem_gov $unemp if ($sample) $weight, absorb($year_area $fe $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			global trend = ""
			global trend_label = "No"
			global year_area = "year_div" 
			global yr_label = "Yes" 			
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $prog $dem_gov $unemp if ($sample) $weight, absorb($year_area $fe $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			global trend = "state_code#c.year"
			global trend_label = "Yes"
			global year_area = "year_div" 
			global yr_label = "Yes" 			
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $prog $dem_gov $unemp if ($sample) $weight, absorb($year_area $fe $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
		if `i' == 1 {
			global table_name = "sec_state_$name"
		}
	esttab using "../regression_results/$table_name.tex", ///
		booktabs fragment replace scalars("basicFE Sector-state and sector-year FE" "state_trend State trend" "area_year Division-year FE") ///
		se b(3) order(wdlapY  $policies $unemp dem_gov $prog) keep(wdlapY  $policies $unemp dem_gov $prog) star(* .1 ** .05 *** .01)   label nodepvar ///
		nonotes nomtitles  	
	}
}


**********************************************************************************************************************************************************
* Event study figures. Figure 2a (LHS = OSHA, RHS = PPE), Figure 2b (LHS = OSHA, RHS = WB), D.3 (LHS = OSHA, RHS = WC), D.4 (LHS = NSC, RHS = PPE), D.5 (LHS = NSC, RHS = WB), D.6 (LHS = NSC, RHS = WC), D.8 (LHS = programmed inspection rate, RHS = PPE), D.9 (LHS = programmed inspection rate, RHS = WB) 
**********************************************************************************************************************************************************

if 1==1 { 

	//Note: 10/20/2020: we are keeping the "passed long in future term" term. Andrew Goodman-Bacon: "Bin up the end points (see recode statements above), and estimate those coefs, but do not report them" - https://twitter.com/agoodmanbacon/status/1165643402999926784

	global prog = "ln_prgm_rate_matt"
	global dem_gov = "dem_gov"
	global controls= "controlsY"
	
	foreach l of numlist 5 { //number of lags in event study graph
		local f = 5 		//number of leads
		eststo clear
		foreach fe in "Y" { //both trend and division-year FE or neither - currently only using "Y"
		foreach lhs in "NSC" "OSHA" "prgm" {
			if "`lhs'" == "OSHA" {
				global unemp = "unemp" //unemployment in the OSHA and programmed inspection event study but not NSC
				global axis = "Effect on workplace safety - OSHA"
			}
			else if "`lhs'" == "prgm" {
				global unemp = "unemp" 
				global axis = "Effect on programmed inspections"
			}
			else {
				global unemp = ""
				global axis = "Effect on workplace safety - NSC"
			}
			
			foreach rhs in "wb" "wdlapY" "wc" { 
				
				use osha_nsc2, clear
				foreach v in wdlapY wdlacY wdlagY { // statutes are currently coded as 0, 1/12, 2/12, etc. for the number of months in a year they were in operation. We want WDL data to be 0/1 for event studies, where passing in June 1980 turns on in 1981, and passing in July 1980 turns on in 1980
					replace `v' = 0 if `v' < .519999
					replace `v' = 1 if `v' >= .52 & !missing(`v')
				}

				keep if sector == 1
				global var = "`rhs'"
				global lhs = "`lhs'"
				
				if "`lhs'" == "OSHA" {
					global weight = "[aw = sh_emp1979]"
					global sample = "(((state_run_office!=1 & year>=1979) | (state_run_office==1 & year>=1992))) & year <= $final" 	
					global depvar = "ln_acc_rateY_matt"
					global prog = "ln_prgm_rate_matt"
				}
				if "`lhs'" == "NSC" {
					global depvar = "ln_death_rate_matt" 
					global weight = "[aw = sh_emp1970]"
					global prog = "ln_prgm_rate_matt"
				 // Next three lines make sure that workplace death sample is consistent across FE specs
					reghdfe ln_death_rate_matt wdlapY dem_gov $weight if sector == 1, absorb(year_div state_code state_code#c.year) vce(cluster state_code) 
					gen insample_nsc = 1 if e(sample)
					global sample = "insample_nsc==1"
				}	
				if "`lhs'" == "prgm" {
					global depvar = "ln_prgm_rateY_matt"
					global sample = "(((state_run_office!=1 & year>=1979) | (state_run_office==1 & year>=1992))) & year <= $final" 	
					global weight = "[aw = sh_emp1979]"
					global prog = ""
				}
				if "`rhs'" == "wdlapY" { //These are the control policies
					global policies = "wdlagY wdlacY wb wc" 
					global var_name = "public policy exception"
				}
				if "`rhs'" == "wb" {
					global policies = "wdlagY wdlacY wdlapY wc"
					global var_name = "whistleblower statute"
				}
				if "`rhs'" == "wc" {
					global policies = "wdlagY wdlacY wdlapY wb"
					global var_name = "workers' compensation statute"
				}
				
				global l = `l'
				global f = `f'

				sort state_num year
				tsset state_num year
					
				sort state_num year
				foreach i of numlist 0/$l { //for example l_3 = 1 if the law was changed 3 years in the past
					gen l_`i' = dl`i'.$var
					label var l_`i' "`i'"
				}
				foreach i of numlist $f/1 { //for example f_3 = 1 if the law was changed 3 years in the future
					gen f_`i' = df`i'.$var
					label var f_`i' "-`i'"
				}

				gen long_ago = l`=$l+1'.$var //long_ago means the law was passed more than 5 years ago. We can just look at the status 6 years ago because policies have not been recinded.
				
				****** This section corrects for the fact that our dataset starts in 1970 but we have data that is earlier than that ******
				foreach v in l_0 l_1 l_2 l_3 l_4 l_5 long_ago { 
					replace `v' = 0 if `v' == . & year <= 1975
				}
				if "$var" == "wdlapY" { //fixing CA
					replace long_ago = 1 if state2 == "CA" //CA adopted PPE in 1959, all others were significantly later
				}
				if "$var" == "wc" { // wc is treated separately because it has states that changed close to the threshold
					replace long_ago = 1 if year - wcyear >= 6 & wcyear < . //wcyear is the year in which wc was first passed
					replace l_5 = 1 if year - wcyear == 5 & wcyear < .
					replace l_4 = 1 if year - wcyear == 4 & wcyear < .
					replace l_3 = 1 if year - wcyear == 3 & wcyear < .
					replace l_2 = 1 if year - wcyear == 2 & wcyear < .
					replace l_1 = 1 if year - wcyear == 1 & wcyear < .
				}
				******** End section fixing pre-1970 data ***********
						
				//The purpose of the next 4 lines is to create long_forward, which 1 if $var was passed at least 6 years in the future and 0 otherwise
				foreach i of numlist 30/1 { //For example ff_8 = 1 if the policy was enacted 8 years in the future.
					gen ff_`i' = df`=`i'-1'.$var 
				}
				egen long_forward = rowtotal(ff_30-ff_`=$f+2') 
				
				*Fixing the coefficient for 1 lead at 0, as is common in literature. 
				replace f_1 = 0

				global trend = "`fe'"  // if `fe' == "Y" then we do with trend and division-year FE. If `fe' == "N" then we do without.
				global yd = "`fe'"
				
				if "$trend" == "Y" {
					global reg_trend = "c.year#state_code"
				}
				else {
					global reg_trend = ""
				}
				if "$yd" == "Y" {
					global reg_yd = "year_div"
				}
				else {
					global reg_yd = ""
				}
				
				reghdfe $depvar long_ago long_forward l_* f_* $unemp $prog $dem_gov if ($sample) $weight , absorb(state_code year $reg_trend $reg_yd $policies)  vce(cluster state_code)

				coefplot, keep(l_* f_*) omitted order(f_* l_*) vertical level(95) baselevels xtitle(Years before and after adoption of $var_name, size(medlarge)) ///
					ytitle("$axis", size(medlarge)) yline(0, lc(gs12)) graphregion(fcolor(white))  xlab(,angle(horizontal)) mc(gs4) ciopts(lc(gs8)) ///
					ysize(11) xsize(20) ylabel(,labsize(medlarge)) xlabel(,labsize(medlarge))

				graph export ../regression_results/event_${lhs}_${controls}_${var}_${f}_${l}_trend${trend}_div-year${yd}_${name}.pdf, replace	
				
				if "`lhs'" == "NSC" & "`rhs'" == "wdlapY" { //no controls, no trend, no region-year with NSC/PPE spec for the letter
					
					reghdfe $depvar long_ago long_forward l_* f_* /*$unemp $prog $dem_gov*/ if ($sample) $weight , absorb(state_code year /*$reg_trend $reg_yd $policies*/)  vce(cluster state_code)

					coefplot, keep(l_* f_*) omitted order(f_* l_*) vertical level(95) baselevels xtitle(Years before and after adoption of $var_name, size(medlarge)) ///
						ytitle("$axis", size(medlarge)) yline(0) graphregion(fcolor(white))  xlab(,angle(horizontal)) ///
						ysize(11) xsize(20) ylabel(,labsize(medlarge)) xlabel(,labsize(medlarge))

					graph export ../regression_results/event_${lhs}_controlsN_${var}_${f}_${l}_trend${trend}_div-year${yd}_${name}.pdf, replace	
				} 
			}	
		}
	}
	}
	
} // i ==1 


**********************************************************************************************************************************************************
*Figure D.7: Testing the effect of different end dates on the main results (ending between 1992 and 2005)
**********************************************************************************************************************************************************

if 1==0 { 
	eststo clear
	use osha_nsc2, clear
	keep if year <= $final
	
	global policies = "wb wc wdlagY wdlacY"
	global list = ""
	
	global depvar = "ln_acc_rateY_matt" // Accident rates (OSHA) is LHS variable
	global prog = "ln_prgm_rateY_matt"
	global unemp = "unemp"
	global dem_gov = "dem_gov"
	global weight = "[aw = sh_emp1979]"

	foreach j of numlist 4 {			
		foreach y of numlist 1992/2005 {
			global sample = "((state_run_office!=1 & year>=1979) | (state_run_office==1 & year>=1992)) & sector == 1 & year <= `y'" 
			if `j' == 1 {
				global trend = ""
				global trend_label = "No"
				global year_area = "" 
				global yr_label = "No" 
			}
			if `j' == 2 {
				global trend = ""
				global trend_label = "No"
				global year_area = "year_div" 
				global yr_label = "Yes" 
			}
			if `j' == 3 {
				global trend = "state_code#c.year"
				global trend_label = "Yes"
				global year_area = "" 
				global yr_label = "No" 
			}
			if `j' == 4 {
				global trend = "state_code#c.year"
				global trend_label = "Yes"
				global year_area = "year_div" 
				global yr_label = "Yes" 
			}
				gen c_`y'_`j' = wdlapY 
				label var c_`y'_`j' "`y'"
				
				reghdfe $depvar c_`y'_`j' $policies if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)				
					eststo e_`y'_`j'
					estadd local basicFE "Yes"
					estadd local state_trend "$trend_label"
					estadd local area_year "$yr_label" 
					global list = "$list e_`y'_`j'"			//$list keeps a running list of the regression names so that we can show the graph later
			}
			coefplot $list, keep(c_*) nokey // each regression with a different ending year is arranged stack on top of each other. Each dot is a point estimate and each line is a 95CI.
			global list=""
			graph export ../regression_results/start_years_`j'.pdf, replace	
	}
}


**********************************************************************************************************************************************************
* Table D.4: Duplicating Table 2 but with only federal OSHA
**********************************************************************************************************************************************************

if 1==0 { 
	eststo clear
	use osha_nsc2, clear
	keep if year <= $final
	keep if state_run_office!=1 //only federal OSHA
	
	global policies = "wb wc wdlagY wdlacY"
	
	foreach i in 1 { //1 for osha
		if `i' == 1 { // Accident rates (OSHA) is LHS variable
			global depvar = "ln_acc_rateY_matt"
			global prog = "ln_prgm_rateY_matt"
			global unemp = "unemp"
			global dem_gov = "dem_gov"
			global weight = "[aw = sh_emp1979]"
			global sample = "((state_run_office!=1 & year>=1979) | (state_run_office==1 & year>=1992)) & sector == 1" 
		}
			global trend = ""
			global trend_label = "No"
			global year_area = "" 
			global yr_label = "No" 			
			reghdfe $depvar wdlapY if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			reghdfe $depvar wb if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
			reghdfe $depvar wc if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
			reghdfe $depvar wdlapY wb wc if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $prog $dem_gov $unemp if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			global trend = ""
			global trend_label = "No"
			global year_area = "year_div" 
			global yr_label = "Yes" 			
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $prog $dem_gov $unemp if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			global trend = "state_code#c.year"
			global trend_label = "Yes"
			global year_area = "year_div" 
			global yr_label = "Yes" 			
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $prog $dem_gov $unemp if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
		if `i' == 1 {
			global table_name = "fed_osha"
		}
		esttab using "../regression_results/$table_name.tex", ///
			booktabs fragment replace scalars("basicFE State FE" "state_trend State trend" "area_year Division-year FE") ///
			se b(3) order(wdlapY  wb wc wdl* $unemp $dem_gov $prog) keep(wdlapY  wb wc wdl* $unemp $dem_gov $prog) star(* .1 ** .05 *** .01)   label nodepvar ///
			nonotes nomtitles  	
		eststo clear
	}
}

**********************************************************************************************************************************************************
* Table D.12. Duplicating Table 2 but with with programmed inspection rate as the LHS
**********************************************************************************************************************************************************


if 1==0 { 
	eststo clear
	use osha_nsc2, clear
	keep if year <= $final
	
	global policies = "wb wc wdlagY wdlacY"
	
	foreach i in 1 { //1 for osha
		if `i' == 1 { // Accident rates (OSHA) is LHS variable
			global depvar = "ln_prgm_rateY_matt" //Programmed inspection rate is LHS
			global unemp = "unemp"
			global dem_gov = "dem_gov"
			global weight = "[aw = sh_emp1979]"
			global sample = "((state_run_office!=1 & year>=1979) | (state_run_office==1 & year>=1992)) & sector == 1" 
		}
			global trend = ""
			global trend_label = "No"
			global year_area = "" 
			global yr_label = "No" 			
			reghdfe $depvar wdlapY if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			reghdfe $depvar wb if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
			reghdfe $depvar wc if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
			reghdfe $depvar wdlapY wb wc if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $dem_gov $unemp if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			global trend = ""
			global trend_label = "No"
			global year_area = "year_div" 
			global yr_label = "Yes" 			
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $dem_gov $unemp if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			global trend = "state_code#c.year"
			global trend_label = "Yes"
			global year_area = "year_div" 
			global yr_label = "Yes" 			
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $dem_gov $unemp if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
		if `i' == 1 {
			global table_name = "lhs_prog"
		}
		esttab using "../regression_results/$table_name.tex", ///
			booktabs fragment replace scalars("basicFE State FE" "state_trend State trend" "area_year Division-year FE") ///
			se b(3) order(wdlapY  wb wc wdl* $unemp $dem_gov) keep(wdlapY  wb wc wdl* $unemp $dem_gov) star(* .1 ** .05 *** .01)   label nodepvar ///
			nonotes nomtitles  	
		eststo clear
	}
}


**********************************************************************************************************************************************************
* Table D.5: "Donut" specification, similar to Table 2. We drop those observations t=-1,0,+1 relative to changes in PPE or WB
**********************************************************************************************************************************************************

if 1==0 { 
	eststo clear
	use osha_nsc2, clear
	
	keep if year <= $final
	keep if sector == 1
	tsset state_num year
	
	foreach v in wdlapY wdlacY wdlagY { // statutes are currently coded as 0, 1/12, 2/12, etc. for the number of months in a year they were in operation. We want WDL data to be 0/1 for event studies, where passing in June 1980 turns on in 1981, and passing in July 1980 turns on in 1980
		replace `v' = 0 if `v' < .519999
		replace `v' = 1 if `v' >= .52 & !missing(`v')
	}

		global trend = "state_code#c.year"
		global year_area = "year_div" 
		
		global depvar = "ln_acc_rateY_matt"
		global prog = "ln_prgm_rateY_matt"
		global unemp = "unemp"
		global dem_gov = "dem_gov"
		global weight = "[aw = sh_emp1979]"
		global policy = "wdlapY"
		global sample = "((state_run_office!=1 & year>=1979) | (state_run_office==1 & year>=1992)) & sector == 1 & d.$policy == 0 & dl.$policy == 0 & df.$policy == 0" //do not include observations where PPE/WB changes in t=-1,0,+1
		
	//This is only to get a consistent sample across the FE specs
		reghdfe $depvar wdlapY wb wc wdlagY wdlacY $dem_gov $unemp if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
		gen insa = 1 if e(sample)

	foreach i in 1 { //Note: currently only using wdlapY
		if `i' == 1 {
			global policy = "wdlapY"
		}
		if `i' == 2 {
			global policy = "wb"
		}
		if "$policy" == "wb" {
			global policies = "wdlapY wc wdlagY wdlacY"
		}
		if "$policy" == "wdlapY " {
			global policies = "wb wc wdlagY wdlacY"
		}		
			global depvar = "ln_acc_rateY_matt"
			global prog = "ln_prgm_rateY_matt"
			global unemp = "unemp"
			global dem_gov = "dem_gov"
			global weight = "[aw = sh_emp1979]"
			global sample = "insa == 1 & ((state_run_office!=1 & year>=1979) | (state_run_office==1 & year>=1992)) & sector == 1 & d.$policy == 0 & dl.$policy == 0 & df.$policy == 0" //do not include observations where PPE/WB changes in t=-1,0,+1

			global trend = ""
			global trend_label = "No"
			global year_area = "" 
			global yr_label = "No" 			
			reghdfe $depvar wdlapY if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			reghdfe $depvar wb if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
			reghdfe $depvar wc if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
			reghdfe $depvar wdlapY wb wc if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $dem_gov $unemp $prog if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			global trend = ""
			global trend_label = "No"
			global year_area = "year_div" 
			global yr_label = "Yes" 			
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $dem_gov $unemp $prog if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			global trend = "state_code#c.year"
			global trend_label = "Yes"
			global year_area = "year_div" 
			global yr_label = "Yes" 			
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $dem_gov $unemp $prog if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
		if `i' == 1 {
			global table_name = "donut_$policy"
		}
		esttab using "../regression_results/$table_name.tex", ///
			booktabs fragment replace scalars("basicFE State FE" "state_trend State trend" "area_year Division-year FE") ///
			se b(3) order(wdlapY  wb wc wdl* $unemp $dem_gov $prog) keep(wdlapY  wb wc wdl* $unemp $dem_gov $prog) star(* .1 ** .05 *** .01)   label nodepvar ///
			nonotes nomtitles  	
		eststo clear
	}
}

**********************************************************************************************************************************************************
* Tables D.8 and D.9. Similar to Table 2. Allowing labor force to enter flexibly, with ln(accidents) = policy + ln(labor force). In Table D.8 the LHS variable is OSHA and in D.9 it is NSC. In Table 2 it is ln(accidents/employment) = policy. 
**********************************************************************************************************************************************************

if 1==0 {
	use osha_nsc2, clear

	keep if sector == 1
	keep if year <= $final
	
	* I need to calculate ln(accidents) in the same manner as above
	sum num_accident_inspY if num_accident_inspY>0, d
	gen lnacc = ln(num_accident_inspY + r(p1)) 
	
	sum deaths if deaths>0, d
	gen lndeath = ln(deaths+r(p1)) 
	
	gen lnemp = ln(empY)
	label var lnemp "Log of employment"
	
	tsset state_num year
	
	* The next 5 lines is just to get a consistent sample across the NSC specs
	global trend = "state_code#c.year"
	global year_area = "year_div" 
	
	reghdfe lndeath wdlapY lnemp if (sector == 1), absorb($year_area state_code year $trend) vce(cluster state_code)
	cap drop insa
	gen insa = 1 if e(sample)
	
	foreach i in 1 2 {
		global policy = "wdlapY "
		global policies = "wb wc wdlagY wdlacY"
		if `i' == 1 { // Accident rates (OSHA) is LHS variable
			global table_name = "flex_pop_osha"
			global depvar = "lnacc"
			global prog = "ln_prgm_rateY_matt"
			global unemp = "unemp"
			global dem_gov = "dem_gov"
			global weight = "[aw = sh_emp1979]"
			global sample = "((state_run_office!=1 & year>=1979) | (state_run_office==1 & year>=1992)) & sector == 1"
		}
		if `i' == 2 { // Workplace deaths rates (NSC) is LHS variable
			global table_name = "flex_pop_nsc"
			global depvar = "lndeath"
			global prog = "ln_prgm_rateY_matt"
			global unemp = ""
			global dem_gov = "dem_gov"
			global weight = "[aw = sh_emp1970]"
			global sample = "sector == 1 & insa == 1"
		}
	
		global trend = ""
		global trend_label = "No"
		global year_area = "" 
		global yr_label = "No" 			
		reghdfe $depvar wdlapY lnemp if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
			eststo 
			estadd local basicFE "Yes"
			estadd local state_trend "$trend_label"
			estadd local area_year "$yr_label" 				
		reghdfe $depvar wb lnemp if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
			eststo 
			estadd local basicFE "Yes"
			estadd local state_trend "$trend_label"
			estadd local area_year "$yr_label" 	
		reghdfe $depvar wc lnemp if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
			eststo 
			estadd local basicFE "Yes"
			estadd local state_trend "$trend_label"
			estadd local area_year "$yr_label" 	
		reghdfe $depvar wdlapY wb wc lnemp if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
			eststo 
			estadd local basicFE "Yes"
			estadd local state_trend "$trend_label"
			estadd local area_year "$yr_label" 				
		reghdfe $depvar wdlapY wb wc wdlagY wdlacY lnemp $dem_gov $unemp $prog if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
			eststo 
			estadd local basicFE "Yes"
			estadd local state_trend "$trend_label"
			estadd local area_year "$yr_label" 	
		global trend = ""
		global trend_label = "No"
		global year_area = "year_div" 
		global yr_label = "Yes" 			
		reghdfe $depvar wdlapY wb wc wdlagY wdlacY lnemp $dem_gov $unemp $prog if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
			eststo 
			estadd local basicFE "Yes"
			estadd local state_trend "$trend_label"
			estadd local area_year "$yr_label" 				
		global trend = "state_code#c.year"
		global trend_label = "Yes"
		global year_area = "year_div" 
		global yr_label = "Yes" 			
		reghdfe $depvar wdlapY wb wc wdlagY wdlacY $dem_gov lnemp $unemp $prog if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
			eststo 
			estadd local basicFE "Yes"
			estadd local state_trend "$trend_label"
			estadd local area_year "$yr_label" 		
		esttab using "../regression_results/$table_name.tex", ///
			booktabs fragment replace scalars("basicFE State FE" "state_trend State trend" "area_year Division-year FE") ///
			se b(3) order($policy lnemp $policies $unemp $dem_gov $prog) keep($policy lnemp $policies $unemp $dem_gov $prog) star(* .1 ** .05 *** .01)   label nodepvar ///
			nonotes nomtitles  	
			eststo clear
	}	
}	 

**********************************************************************************************************************************************************
* Table D.7. Similar to Table 2. Splitting the sample into manufacturing and non-manu.
**********************************************************************************************************************************************************

if 1==0 {
	eststo clear
	use osha_nsc2, clear
	keep if year <= $final
			
	global policies = "wb wc wdlagY wdlacY"
	
	foreach i in 1 { 
		if `i' == 1 { 
			global depvar = "ln_acc_rateY_matt"
			global prog = "ln_prgm_rateY_matt"
			global unemp = "unemp"
			global dem_gov = "dem_gov"
			global weight = "[aw = sh_emp1979]"
			global sample = "((state_run_office!=1 & year>=1979) | (state_run_office==1 & year>=1992)) & sector != 1" 
		}
			global trend = ""
			global trend_label = "No"
			global year_area = "" 
			global yr_label = "No" 			
			reghdfe $depvar wdlapY if ($sample  & sector == 2) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
				estadd local sector "Manu."
			reghdfe $depvar wdlapY if ($sample  & sector == 3) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
				estadd local sector "Non-m."
			reghdfe $depvar wb if ($sample  & sector == 2) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
				estadd local sector "Manu."
			reghdfe $depvar wb if ($sample  & sector == 3) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
				estadd local sector "Non-m."
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $prog $dem_gov $unemp if ($sample & sector == 2) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
				estadd local sector "Manu."
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $prog $dem_gov $unemp if ($sample & sector == 3) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
				estadd local sector "Non-m."
			global trend = "state_code#c.year"
			global trend_label = "Yes"
			global year_area = "year_div" 
			global yr_label = "Yes" 			
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $prog $dem_gov $unemp if ($sample & sector == 2) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 		
				estadd local sector "Manu."
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $prog $dem_gov $unemp if ($sample & sector == 3) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 		
				estadd local sector "Non-m."
		if `i' == 1 {
			global table_name = "manu_not_osha"
		}
		esttab using "../regression_results/$table_name.tex", ///
			booktabs fragment replace scalars("basicFE State FE" "state_trend State trend" "area_year Division-year FE" "sector Sector") ///
			se b(3) order(wdlapY  wb wc wdl* $unemp $dem_gov $prog) keep(wdlapY  wb wc wdl* $unemp $dem_gov $prog) star(* .1 ** .05 *** .01)   label nodepvar ///
			nonotes nomtitles  	
		eststo clear
	}
}


**********************************************************************************************************************************************************
* Table D.1: similar to Table 2. Dependent variable is manufacturing's share of employment.
**********************************************************************************************************************************************************

if 1==0	 { 
	eststo clear
	use osha_nsc2, clear
	
	gen lnmanu = ln(pct_emp_mfg)
	keep if year <= $final

	foreach i in 1 { 
		if `i' == 1 { 
			global table_name = "pct_manu" 
			global depvar = "pct_emp_mfg"
			global unemp = "unemp"
			global dem_gov = "dem_gov"
			global weight = "[aw = sh_emp1979]"
			global sample = "((state_run_office!=1 & year>=1979) | (state_run_office==1 & year>=1992)) & sector == 1" 
		}
			global trend = ""
			global trend_label = "No"
			global year_area = "" 
			global yr_label = "No" 			
			reghdfe $depvar wdlapY if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend  "$trend_label"
				estadd local area_year "$yr_label" 
				
				sum $depvar
				estadd scalar mean = r(mean)
				
			reghdfe $depvar wb if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend  "$trend_label"
				estadd local area_year "$yr_label" 	
				sum $depvar
				estadd scalar mean = r(mean)

			reghdfe $depvar wc if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend  "$trend_label"
				estadd local area_year "$yr_label" 	
				sum $depvar
				estadd scalar mean = r(mean)
				
			reghdfe $depvar wdlapY wb wc if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend  "$trend_label"
				estadd local area_year "$yr_label" 		
				sum $depvar
				estadd scalar mean = r(mean)
				
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $prog $dem_gov $unemp if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 		
				sum $depvar
				estadd scalar mean = r(mean)
				
			global trend = ""
			global trend_label = "No"
			global year_area = "year_div" 
			global yr_label = "Yes" 			
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $prog $dem_gov $unemp if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
				sum $depvar
				estadd scalar mean = r(mean)
				
			global trend = "state_code#c.year"
			global trend_label = "Yes"
			global year_area = "year_div" 
			global yr_label = "Yes" 						
			reghdfe $depvar wdlapY wb wc wdlagY wdlacY $prog $dem_gov $unemp if ($sample) $weight, absorb($year_area state_code year $trend) vce(cluster state_code)
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 			
				sum $depvar
				estadd scalar mean = r(mean)				
		esttab using "../regression_results/$table_name.tex",  ///
			booktabs fragment replace scalars("basicFE State FE" "state_trend State trend" "area_year Division-year FE" "mean Mean Dep Var") ///
			se b(3) order(wdlapY  wb wc wdl* $unemp $dem_gov $prog) keep(wdlapY  wb wc wdl* $unemp $dem_gov $prog) star(* .1 ** .05 *** .01)   label nodepvar ///
			nonotes nomtitles  	
		eststo clear
	}
}


**********************************************************************************************************************************************************
* Tables D.10 and D.11. Similar to Table 2. Poisson and negative binomial regressions. Note: this regression doesn't converge with division-year and state-trends so we omit the last column
**********************************************************************************************************************************************************

if 1==0 {
	eststo clear
	use osha_nsc2, clear
	
	keep if year <= $final & ((state_run_office!=1 & year>=1979) | (state_run_office==1 & year>=1992)) & sector == 1
	
	global depvar = "ln_acc_rateY_matt"
	global prog = "ln_prgm_rateY_matt"
	global unemp = "unemp"
	global dem_gov = "dem_gov"
	global weight = "[aw = sh_emp1979]"
	global policies = "wb wc wdlagY wdlacY"

	//trying to get poisson to converge, and apparently rescaling variables can help
	replace empY = empY * 1000 
	sum $prog
	replace $prog = $prog / r(mean) / 10
	sum $unemp
	replace $unemp = $unemp / r(mean)
	
	rename num_accident_inspY Accident_rate	

	foreach model in poisson nbreg {
		global model = "`model'"		

		label var Accident_rate "Accident rate (OSHA)"

			global table_name = "$model"
			global depvar = "pct_emp_mfg"
			global unemp = "unemp"
			global dem_gov = "dem_gov"
			global weight = "[aw = sh_emp1979]"
			global sample = "((state_run_office!=1 & year>=1979) | (state_run_office==1 & year>=1992)) & sector == 1" 
			
			global trend = ""
			global trend_label = "No"
			global year_area = "" 
			global yr_label = "No" 			

			$model Accident_rate wdlapY i.state_num i.year $trend $year_area, exposure(empY) cluster(state_num) 
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			$model Accident_rate wb i.state_num i.year $trend $year_area, exposure(empY) cluster(state_num) 
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
			$model Accident_rate wc i.state_num i.year $trend $year_area, exposure(empY) cluster(state_num) 
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 	
			$model Accident_rate wdlapY wb wc i.state_num i.year $trend $year_area, exposure(empY) cluster(state_num) 
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 			
			$model Accident_rate wdlapY wb wc wdlagY wdlacY $prog $dem_gov $unemp i.state_num i.year $trend $year_area, exposure(empY) cluster(state_num) 
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 				
			global trend = ""
			global trend_label = "No"
			global year_area = "year_div" 
			global yr_label = "Yes" 		
			$model Accident_rate wdlapY wb wc wdlagY wdlacY $prog $dem_gov $unemp i.state_num i.year $trend $year_area, exposure(empY) cluster(state_num) 
				eststo 
				estadd local basicFE "Yes"
				estadd local state_trend "$trend_label"
				estadd local area_year "$yr_label" 						
		esttab using "../regression_results/$table_name.tex", ///
			booktabs fragment replace scalars("basicFE State FE" "state_trend State trend" "area_year Division-year FE") ///
			se b(3) order(wdlapY  wb wc wdl* $unemp $dem_gov $prog) keep(wdlapY  wb wc wdl* $unemp $dem_gov $prog) star(* .1 ** .05 *** .01)   label nodepvar ///
			nonotes nomtitles  	
		eststo clear
	}
}

*************************
*Tables D.15 and D.16: Do file to create tables assessing heterogeneity in effect of WB and WC statutes, based on damages 
*************************

if 1==0 { 

	global start 2005 
	
	use osha_nsc2, clear
	keep if year<=2005
	
	label var wb_pun "Whistleblower, punitive"
	label var wb_nopun "Whistleblower, no punitive"
	label var wc_pun "Workers' comp, punitive"
	label var wc_nopun "Workers' comp, no punitive"
	
	****************************
	*Main table: using OSHA data 
	****************************
	global weight = "[aw = sh_emp1979]"
	global sample = "((state_run_office!=1 & year>=1979) | (state_run_office==1 & year>=1992)) & sector == 1" 

	global fes = "year state_code "
	global prog = "" 
	global unemp = "" 
	global dem_gov = "" 

	*state + year FE 	
		*WB: Punitive versus not 
			reghdfe ln_acc_rateY_matt  wb_nopun wb_pun $prog $unemp $dem_gov $weight if sector == 1 & $sample, absorb($fes) vce(cluster state_code) 
				test wb_nopun = wb_pun 
				eststo 
				local f : di %6.3f `r(p)'
				estadd local F_wb "`f'"
				estadd local basicFE "Yes"
				estadd local state_trend "No"
				estadd local area_year "No" 	
		*WC: Punitive versus not 
			reghdfe ln_acc_rateY_matt  wc_nopun wc_pun $prog $unemp $dem_gov $weight if sector == 1 & $sample, absorb($fes) vce(cluster state_code) 
				test wc_nopun = wc_pun 
				eststo 
				local f : di %6.3f `r(p)'
				estadd local F_wc "`f'"
				estadd local basicFE "Yes"
				estadd local state_trend "No"
				estadd local area_year "No" 	
		*Both WB and WC: Punitive versus not 
			reghdfe ln_acc_rateY_matt  wb_nopun wb_pun $prog $unemp $dem_gov  wc_nopun wc_pun $weight if sector == 1 & $sample, absorb($fes) vce(cluster state_code) 
				eststo 
				test wb_nopun = wb_pun  
				local f : di %6.3f `r(p)'
				estadd local F_wb "`f'"
				test wc_nopun = wc_pun 
				local f : di %6.3f `r(p)'
				estadd local F_wc "`f'"
				estadd local basicFE "Yes"
				estadd local state_trend "No"
				estadd local area_year "No" 	
	*Both WB and WC and all controls: + division-year FE 
			reghdfe ln_acc_rateY_matt  wb_nopun wb_pun  wc_nopun wc_pun $prog $unemp $dem_gov  $weight if sector == 1 & $sample, absorb($fes year_div) vce(cluster state_code) 
				eststo 
				test wb_nopun = wb_pun  
				local f : di %6.3f `r(p)'
				estadd local F_wb "`f'"
				test wc_nopun = wc_pun 
				local f : di %6.3f `r(p)'
				estadd local F_wc "`f'"
				estadd local basicFE "Yes"
				estadd local state_trend "No"
				estadd local area_year "Yes" 	
	*Both WB and WC and all controls: + division-year FE + state trends 
			reghdfe ln_acc_rateY_matt  wb_nopun wb_pun  wc_nopun wc_pun  $prog $unemp $dem_gov $weight if sector == 1 & $sample, absorb($fes year_div c.year#i.state_code) vce(cluster state_code) 
				eststo 
				test wb_nopun = wb_pun  
				local f : di %6.3f `r(p)'
				estadd local F_wb "`f'"
				test wc_nopun = wc_pun 
				local f : di %6.3f `r(p)'
				estadd local F_wc "`f'"
				estadd local basicFE "Yes"
				estadd local state_trend "Yes"
				estadd local area_year "Yes" 	
		global table_name = "het_osha"
		esttab using "../regression_results/$table_name.tex", ///
			booktabs fragment replace scalars("basicFE State FE" "state_trend State trend" "area_year Division-year FE" "F_wb F-test WB" "F_wc F-test WC") ///
			se b(3) order(wb_pun wb_nopun wc_pun wc_nopun $prog $unemp $dem_gov) keep(wb_pun wb_nopun wc_pun wc_nopun $prog $unemp $dem_gov) star(* .1 ** .05 *** .01)   label nodepvar ///
			nonotes nomtitles  	
		eststo clear
	
*instead use NSC data 
	global weight_nsc = "[aw = sh_emp1970]"
	reghdfe ln_death_rate_matt wdlapY  $weight_nsc if sector == 1, absorb(year_div state_code ) vce(cluster state_code) 
	gen insample_nsc = 1 if e(sample)		

	*state + year FE 	
		*WC: Punitive versus not 
			reghdfe ln_death_rate_matt wc_nopun wc_pun  $prog $dem_gov $weight_nsc if sector == 1 & insample_nsc == 1, absorb($fes) vce(cluster state_code) 
				mat m = r(table)
				mat list m
				test wc_nopun = wc_pun 
				eststo 
				local f : di %6.3f `r(p)'
				estadd local F_wc "`f'"
				di "`f'"
				estadd local basicFE "Yes"
				estadd local state_trend "No"
				estadd local area_year "No" 
				
	* WC + division-year FE 
			reghdfe ln_death_rate_matt wc_nopun wc_pun  $prog $dem_gov $weight_nsc if sector == 1 & insample_nsc == 1, absorb($fes year_div) vce(cluster state_code) 
				eststo 
				test wc_nopun = wc_pun 
				local f : di %6.3f `r(p)'
				estadd local F_wc "`f'"
				estadd local basicFE "Yes"
				estadd local state_trend "No"
				estadd local area_year "Yes" 	
	* WC + division-year FE + state trends 
			reghdfe ln_death_rate_matt  wc_nopun wc_pun  $prog $dem_gov $weight_nsc if sector == 1 & insample_nsc == 1 , absorb($fes year_div c.year#i.state_code) vce(cluster state_code) 
				eststo 
				test wc_nopun = wc_pun 
				local f : di %6.3f `r(p)'
				estadd local F_wc "`f'"
				estadd local basicFE "Yes"
				estadd local state_trend "Yes"
				estadd local area_year "Yes" 

		global table_name = "het_nsc"
		esttab using "../regression_results/$table_name.tex", ///
			booktabs fragment replace scalars("basicFE State FE" "state_trend State trend" "area_year Division-year FE" "F_wc F-test WC") ///
			se b(3) order(wc_pun wc_nopun  $prog $dem_gov ) keep(wc_pun wc_nopun $prog $dem_gov ) star(* .1 ** .05 *** .01)   label nodepvar ///
			nonotes nomtitles  	
		eststo clear
}		





timer off 1
timer list
cap log close

